library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages -------------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5 v purrr 0.3.4
v tibble 3.1.2 v dplyr 1.0.7
v tidyr 1.1.3 v stringr 1.4.0
v readr 2.0.0 v forcats 0.5.1
-- Conflicts ----------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(janitor)
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
library(here)
here() starts at E:/Prathiba_Course/_codeclan/my_projects/phs_covid/analysis
library(ggplot2)
library(lubridate)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:base’:
date, intersect, setdiff, union
library(plotly)
Warning: package ‘plotly’ was built under R version 4.1.1
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(fable)
Warning: package ‘fable’ was built under R version 4.1.1
Loading required package: fabletools
Warning: package ‘fabletools’ was built under R version 4.1.1
library(tsibble)
Warning: package ‘tsibble’ was built under R version 4.1.1
Attaching package: ‘tsibble’
The following object is masked from ‘package:lubridate’:
interval
The following objects are masked from ‘package:base’:
intersect, setdiff, union
library(tsibbledata)
Warning: package ‘tsibbledata’ was built under R version 4.1.1
## ---------------------------------------------------------------------------
#Load all the datafiles into dataset using loop
## ---------------------------------------------------------------------------
## List the files.
setwd("../raw_data")
Warning: The working directory was changed to E:/Prathiba_Course/_codeclan/my_projects/phs_covid/raw_data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
files <- list.files(pattern=".csv")
## read data using loop
for (f in files) {
file_name <- str_to_lower(str_replace(f,".csv",""))
assign(paste(file_name),read_csv(f))
}
Rows: 25620 Columns: 18
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (8): Country, Sex, SexQF, AgeGroup, AgeGroupQF, Dose, PercentCoverageQF, CumulativePercentCoverageQF
dbl (6): Date, Population, NumberVaccinated, PercentCoverage, CumulativeNumberVaccinated, CumulativePercentCove...
lgl (4): PopulationQF, DoseQF, NumberVaccinatedQF, CumulativeNumberVaccinatedQF
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 419070 Columns: 20
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (10): HB, HBQF, HBName, Sex, SexQF, AgeGroup, AgeGroupQF, Dose, PercentCoverageQF, CumulativePercentCoverageQF
dbl (6): Date, Population, NumberVaccinated, PercentCoverage, CumulativeNumberVaccinated, CumulativePercentCov...
lgl (4): PopulationQF, DoseQF, NumberVaccinatedQF, CumulativeNumberVaccinatedQF
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 865590 Columns: 20
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (9): CA, CAName, Sex, SexQF, AgeGroup, AgeGroupQF, Dose, PercentCoverageQF, CumulativePercentCoverageQF
dbl (6): Date, Population, NumberVaccinated, PercentCoverage, CumulativeNumberVaccinated, CumulativePercentCove...
lgl (5): CAQF, PopulationQF, DoseQF, NumberVaccinatedQF, CumulativeNumberVaccinatedQF
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 4270 Columns: 10
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (4): Country, Product, Dose, AgeBand
dbl (6): Date, Population, NumberVaccinated, PercentCoverage, CumulativeNumberVaccinated, CumulativePercentCove...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 32 Columns: 15
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (2): CA, CAName
dbl (13): Date, TotalTests, TotalTestsNew, PositiveTests, PositiveTestsNew, PositivePercentageTotal, PositivePe...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 15 Columns: 15
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (2): HB, HBName
dbl (13): Date, TotalTests, TotalTestsNew, PositiveTests, PositiveTestsNew, PositivePercentageTotal, PositivePe...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 19403 Columns: 14
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (5): Country, Sex, SexQF, AgeGroup, AgeGroupQF
dbl (9): Date, DailyPositive, CumulativePositive, CrudeRatePositive, DailyDeaths, CumulativeDeaths, CrudeRateDe...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 18807 Columns: 21
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (2): CA, CAName
dbl (19): Date, DailyPositive, CumulativePositive, CrudeRatePositive, CrudeRate7DayPositive, DailyDeaths, Cumul...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 8811 Columns: 25
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (4): HB, HBName, HospitalAdmissionsQF, ICUAdmissionsQF
dbl (21): Date, DailyPositive, CumulativePositive, CrudeRatePositive, CrudeRate7DayPositive, DailyDeaths, Cumul...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 658923 Columns: 10
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (7): IntZone, IntZoneName, CA, CAName, Positive7DayQF, CrudeRate7DayPositive, CrudeRate7DayPositiveQF
dbl (3): Date, Positive7Day, Population
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2940 Columns: 11
-- Column specification -------------------------------------------------------------------------------------------
Delimiter: ","
chr (1): Country
dbl (10): Date, SIMDQuintile, DailyPositive, CumulativePositive, CrudeRatePositive, DailyDeaths, CumulativeDeat...
i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Function to create a theme for the plot-----------------------------------------
color_theme <- function() {
theme(
plot.background = element_rect(fill = "white"),
plot.title = element_text(size = rel(2)),
plot.title.position = "plot",
panel.border = element_rect(colour = "blue", fill = NA, linetype = 1),
panel.background = element_rect(fill = "white"),
panel.grid = element_line(colour = "grey85", linetype = 1, size = 0.5),
axis.text = element_text(colour = "blue", face = "italic", size = 12),
axis.title.y = element_text(colour = "#1B732B", size = 10, angle = 90),
axis.title.x = element_text(colour = "#1B732B", size = 10),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6)
)
}
trend_hb_daily <- trend_hb_20211008 %>%
clean_names()
#Convert the date to date format
trend_hb_daily <- trend_hb_daily %>%
mutate(date = as_date(ymd(date))) %>%
filter (year(date) == 2021)
#filter(date >="2021-04-01")
#Plot to visualise trend on positive cases.
x <- trend_hb_daily %>%
filter (hb_name == "Scotland") %>%
ggplot()+
aes(x = date, y = daily_positive)+
geom_line()+
#scale_x_date(breaks = "1 month")+
scale_x_date(breaks = "1 month", date_labels = "%b - %y" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
ggtitle("Trend on Positive cases") +
xlab("Year") +
ylab("No of Positive Cases") +
color_theme()
ggplotly(x)
NA
NA
x <- trend_hb_daily %>%
filter(date >="2021-04-01") %>%
filter (hb_name == "Scotland") %>%
ggplot()+
aes(x = date, y = daily_positive)+
geom_line()+
#scale_x_date(breaks = "1 month")+
scale_x_date(breaks = "1 month", date_labels = "%b - %y" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
ggtitle("People tested Positive") +
xlab("Year") +
ylab("No of Positive Cases") +
color_theme()
ggplotly(x)
trend <- trend_hb_daily %>%
filter (hb_name == "Scotland") %>%
filter(date >="2021-06-01") %>%
select(date, daily_positive)
trend <- as_tsibble(trend, index = date)
fit <- trend %>%
model(
snaive = SNAIVE(daily_positive),
mean_model = MEAN(daily_positive),
arima = ARIMA(daily_positive)
)
forecast_14days <- fit %>%
fabletools::forecast(h = 14)
forecast_14days
forecast_14days %>%
#filter(.model == "snaive") %>%
autoplot(trend, level = NULL) +
ggtitle("Forecasts for Positive cases for 2 weeks") +
xlab("Year") +
ylab("No of Positive Cases") +
guides(colour = guide_legend(title = "Forecast"))+
scale_x_date(breaks = "1 month", date_labels = "%B" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
forecast_1month <- fit %>%
fabletools::forecast(h = "1 month")
forecast_1month
forecast_1month %>%
#filter(.model == "snaive") %>%
autoplot(trend, level = NULL) +
ggtitle("Forecasts for Positive cases for one month") +
xlab("Year") +
ylab("No of Positive Cases") +
guides(colour = guide_legend(title = "Forecast"))+
scale_x_date(breaks = "1 month", date_labels = "%B" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
# check our available years so we know where to put the split in the data
# Now set our training data from 1992 to 2006
train <- trend %>%
filter_index("2021-01-01" ~ "2021-08-31")
# run the model on the training set
fit_train <- train %>%
model(
mean_model = MEAN(daily_positive),
arima = ARIMA(daily_positive),
snaive = SNAIVE(daily_positive)
)
forecast_test <- fit_train %>%
fabletools::forecast(h = 30)
forecast_test %>%
autoplot(train, level = NULL) +
autolayer(filter_index(trend, "2021-09-01" ~.), color = "black") +
ggtitle("Forecasts for positive cases") +
xlab("Date") + ylab("Daily Positive") +
guides(colour=guide_legend(title="Forecast"))
Plot variable not specified, automatically selected `.vars = daily_positive`
accuracy_model <- fabletools::accuracy(forecast_test, trend)
accuracy_model %>%
select(-.type) %>%
arrange(RMSE)
plot_hosp <- trend_hb_daily %>%
filter(date >="2021-04-01") %>%
filter (hb_name == "Scotland") %>%
filter(!(is.na(hospital_admissions))) %>%
ggplot()+
aes(x = date, y = hospital_admissions)+
geom_col()+
#scale_x_date(breaks = "1 month")+
scale_x_date(breaks = "1 week", date_labels = "%d - %b" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
ggtitle("Trend on Hospitalisations") +
xlab("Date (months)") +
ylab("No of patients admitted") +
color_theme()
ggplotly(plot_hosp)
NA
Using 6 months of data ( Two - Wave)
forecast_14days %>%
# filter(.model == "snaive") %>%
autoplot(trend_hosp, level = NULL) +
ggtitle("Forecasts for Hospital admissions cases for 2 weeks") +
xlab("Year") +
ylab("No of patients admitted") +
guides(colour = guide_legend(title = "Forecast"))+
scale_x_date(breaks = "1 month", date_labels = "%B" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
NA
NA
NA
NA
forecast_1month <- fit %>%
fabletools::forecast(h = "1 month")
forecast_1month
forecast_1month %>%
filter(.model %in% c("snaive","arima")) %>%
autoplot(trend_hosp, level = NULL) +
ggtitle("Forecasts for Hospitalisation for one month") +
xlab("Year") +
ylab("No of Positive Cases") +
guides(colour = guide_legend(title = "Forecast"))+
scale_x_date(breaks = "1 month", date_labels = "%B" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
# set the training data
train <- trend_hosp %>%
filter_index("2021-06-01" ~ "2021-08-31")
# run the model on the training set
fit_train <- train %>%
model(
mean_model = MEAN(hospital_admissions),
arima = ARIMA(hospital_admissions),
snaive = SNAIVE(hospital_admissions),
ets = ETS(log(hospital_admissions) ~ error("M") + trend("Ad") + season("A"))
)
forecast_test <- fit_train %>%
fabletools::forecast(h = 35)
forecast_test %>%
filter(.model %in% c("arima","snaive")) %>%
autoplot(train ) +
autolayer(filter_index(trend_hosp, "2021-06-01" ~.), color = "black") +
ggtitle("Forecasts for Hospitalisations") +
facet_wrap(~.model)+
xlab("Date") +
ylab("No of Patients admitted") +
guides(colour=guide_legend(title="Forecast"))
Plot variable not specified, automatically selected `.vars = hospital_admissions`
accuracy_model <- fabletools::accuracy(forecast_test, trend_hosp)
Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
1 observation is missing at 2021-10-05
accuracy_model %>%
select(-.type) %>%
arrange(RMSE)
Prepare the data
daily_vacc_hb <- daily_vacc_hb_20211009 %>%
clean_names()
#Convert the date to date format
daily_vacc_hb <- daily_vacc_hb %>%
mutate(date = as_date(ymd(date)))
#filter (year(date) == 2021)
daily_vacc_hb_plot <- daily_vacc_hb %>%
filter(hb_name == "Scotland") %>%
filter(sex =="Total") %>%
filter(age_group == "All vaccinations") %>%
filter(number_vaccinated!=0)
#select(date,sex, age_group, number_vaccinated)
#Plot to visualise trend on positive cases.
plot_vaccine <- daily_vacc_hb_plot %>%
ggplot()+
aes(x = date, y = number_vaccinated)+
geom_line(aes(color = dose))+
facet_wrap(~dose)+
#scale_x_date(breaks = "1 month")+
scale_x_date(breaks = "1 month", date_labels = "%b - %y" )+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
ggtitle("Trend on Vaccination") +
xlab("Year") +
ylab("No of Positive Cases") +
color_theme()
ggplotly(plot_vaccine)